In [ ]:
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
In this lab, you will attempt to find an improvement on a mini-ResNet for CIFAR-10.
Below is a composable "class" based version for building ResNet networks. Spend a few moments looking at the structure and get familiar.
In [ ]:
import tensorflow as tf
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, BatchNormalization, ReLU
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Add
class ResNetV2(object):
""" Construct a Residual Convolution Network Network V2 """
# Meta-parameter: list of groups: number of filters and number of blocks
groups = { 50 : [ (64, 3), (128, 4), (256, 6), (512, 3) ], # ResNet50
101: [ (64, 3), (128, 4), (256, 23), (512, 3) ], # ResNet101
152: [ (64, 3), (128, 8), (256, 36), (512, 3) ] # ResNet152
}
_model = None
init_weights = 'he_normal'
def __init__(self, n_layers, input_shape=(224, 224, 3), n_classes=1000):
""" Construct a Residual Convolutional Neural Network V2
n_layers : number of layers
input_shape: input shape
n_classes : number of output classes
"""
if n_layers not in [50, 101, 152]:
raise Exception("ResNet: Invalid value for n_layers")
# The input tensor
inputs = Input(input_shape)
# The stem convolutional group
x = self.stem(inputs)
# The learner
x = self.learner(x, self.groups[n_layers])
# The classifier for 1000 classes
outputs = self.classifier(x, n_classes)
# Instantiate the Model
self._model = Model(inputs, outputs)
@property
def model(self):
return self._model
@model.setter
def model(self, _model):
self._model = _model
def stem(self, inputs):
""" Construct the Stem Convolutional Group
inputs : the input vector
"""
# The 224x224 images are zero padded (black - no signal) to be 230x230 images prior to the first convolution
x = ZeroPadding2D(padding=(3, 3))(inputs)
# First Convolutional layer uses large (coarse) filter
x = Conv2D(64, (7, 7), strides=(2, 2), padding='valid', use_bias=False, kernel_initializer=self.init_weights)(x)
x = BatchNormalization()(x)
x = ReLU()(x)
# Pooled feature maps will be reduced by 75%
x = ZeroPadding2D(padding=(1, 1))(x)
x = MaxPooling2D((3, 3), strides=(2, 2))(x)
return x
def learner(self, x, groups):
""" Construct the Learner
x : input to the learner
groups: list of groups: number of filters and blocks
"""
# First Residual Block Group (not strided)
n_filters, n_blocks = groups.pop(0)
x = ResNetV2.group(x, n_filters, n_blocks, strides=(1, 1))
# Remaining Residual Block Groups (strided)
for n_filters, n_blocks in groups:
x = ResNetV2.group(x, n_filters, n_blocks)
return x
@staticmethod
def group(x, n_filters, n_blocks, strides=(2, 2), init_weights=None):
""" Construct a Residual Group
x : input into the group
n_filters : number of filters for the group
n_blocks : number of residual blocks with identity link
strides : whether the projection block is a strided convolution
"""
# Double the size of filters to fit the first Residual Group
x = ResNetV2.projection_block(x, n_filters, strides=strides, init_weights=init_weights)
# Identity residual blocks
for _ in range(n_blocks):
x = ResNetV2.identity_block(x, n_filters, init_weights=init_weights)
return x
@staticmethod
def identity_block(x, n_filters, init_weights=None):
""" Construct a Bottleneck Residual Block with Identity Link
x : input into the block
n_filters: number of filters
"""
if init_weights is None:
init_weights = ResNetV2.init_weights
# Save input vector (feature maps) for the identity link
shortcut = x
## Construct the 1x1, 3x3, 1x1 convolution block
# Dimensionality reduction
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv2D(n_filters, (1, 1), strides=(1, 1), use_bias=False, kernel_initializer=init_weights)(x)
# Bottleneck layer
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv2D(n_filters, (3, 3), strides=(1, 1), padding="same", use_bias=False, kernel_initializer=init_weights)(x)
# Dimensionality restoration - increase the number of output filters by 4X
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv2D(n_filters * 4, (1, 1), strides=(1, 1), use_bias=False, kernel_initializer=init_weights)(x)
# Add the identity link (input) to the output of the residual block
x = Add()([shortcut, x])
return x
@staticmethod
def projection_block(x, n_filters, strides=(2,2), init_weights=None):
""" Construct a Bottleneck Residual Block of Convolutions with Projection Shortcut
Increase the number of filters by 4X
x : input into the block
n_filters: number of filters
strides : whether the first convolution is strided
"""
# Construct the projection shortcut
# Increase filters by 4X to match shape when added to output of block
shortcut = BatchNormalization()(x)
shortcut = Conv2D(4 * n_filters, (1, 1), strides=strides, use_bias=False, kernel_initializer='he_normal')(shortcut)
## Construct the 1x1, 3x3, 1x1 convolution block
# Dimensionality reduction
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv2D(n_filters, (1, 1), strides=(1,1), use_bias=False, kernel_initializer='he_normal')(x)
# Bottleneck layer
# Feature pooling when strides=(2, 2)
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv2D(n_filters, (3, 3), strides=strides, padding='same', use_bias=False, kernel_initializer='he_normal')(x)
# Dimensionality restoration - increase the number of filters by 4X
x = BatchNormalization()(x)
x = ReLU()(x)
x = Conv2D(4 * n_filters, (1, 1), strides=(1, 1), use_bias=False, kernel_initializer='he_normal')(x)
# Add the projection shortcut to the output of the residual block
x = Add()([x, shortcut])
return x
def classifier(self, x, n_classes):
""" Construct the Classifier Group
x : input to the classifier
n_classes : number of output classes
"""
# Pool at the end of all the convolutional residual blocks
x = GlobalAveragePooling2D()(x)
# Final Dense Outputting Layer for the outputs
outputs = Dense(n_classes, activation='softmax', kernel_initializer=self.init_weights)(x)
return outputs
# Example
# resnet = ResNetV2(50)
Below is a mini-ResNet I wrote for CIFAR-10. Notice how at the bottleneck layer the feature maps are 3 x 3 (max pooling).
REMOVED for brevity ...
batch_normalization_783 (BatchN (None, 16, 16, 8) 32 add_259[0][0]
__________________________________________________________________________________________________
conv2d_789 (Conv2D) (None, 8, 8, 1024) 262144 re_lu_782[0][0]
__________________________________________________________________________________________________
conv2d_786 (Conv2D) (None, 8, 8, 1024) 8192 batch_normalization_783[0][0]
__________________________________________________________________________________________________
add_260 (Add) (None, 8, 8, 1024) 0 conv2d_789[0][0]
conv2d_786[0][0]
__________________________________________________________________________________________________
batch_normalization_787 (BatchN (None, 8, 8, 1024) 4096 add_260[0][0]
__________________________________________________________________________________________________
re_lu_783 (ReLU) (None, 8, 8, 1024) 0 batch_normalization_787[0][0]
__________________________________________________________________________________________________
conv2d_790 (Conv2D) (None, 8, 8, 256) 262144 re_lu_783[0][0]
__________________________________________________________________________________________________
batch_normalization_788 (BatchN (None, 8, 8, 256) 1024 conv2d_790[0][0]
__________________________________________________________________________________________________
re_lu_784 (ReLU) (None, 8, 8, 256) 0 batch_normalization_788[0][0]
__________________________________________________________________________________________________
conv2d_791 (Conv2D) (None, 8, 8, 256) 589824 re_lu_784[0][0]
__________________________________________________________________________________________________
batch_normalization_789 (BatchN (None, 8, 8, 256) 1024 conv2d_791[0][0]
__________________________________________________________________________________________________
re_lu_785 (ReLU) (None, 8, 8, 256) 0 batch_normalization_789[0][0]
__________________________________________________________________________________________________
conv2d_792 (Conv2D) (None, 8, 8, 1024) 262144 re_lu_785[0][0]
__________________________________________________________________________________________________
add_261 (Add) (None, 8, 8, 1024) 0 add_260[0][0]
conv2d_792[0][0]
__________________________________________________________________________________________________
flatten_1 (Flatten) (None, 65536) 0 add_261[0][0]
__________________________________________________________________________________________________
dense_1 (Dense) (None, 10) 655370 flatten_1[0][0]
==================================================================================================
Total params: 2,656,334
Trainable params: 2,648,998
Non-trainable params: 7,336
Below is the results for training for 10 epochs.
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
45000/45000 [==============================] - 1229s 27ms/sample - loss: 4.3040 - acc: 0.1834 - val_loss: 2.1594 - val_acc: 0.2208
Epoch 2/10
45000/45000 [==============================] - 1029s 23ms/sample - loss: 2.0595 - acc: 0.2479 - val_loss: 1.9784 - val_acc: 0.2804
Epoch 3/10
45000/45000 [==============================] - 1144s 25ms/sample - loss: 1.9655 - acc: 0.2876 - val_loss: 1.9719 - val_acc: 0.2832
Epoch 4/10
45000/45000 [==============================] - 1149s 25ms/sample - loss: 1.8521 - acc: 0.3316 - val_loss: 1.7835 - val_acc: 0.3534
Epoch 5/10
45000/45000 [==============================] - 1227s 27ms/sample - loss: 1.7317 - acc: 0.3791 - val_loss: 1.7436 - val_acc: 0.3712
Epoch 6/10
45000/45000 [==============================] - 1138s 25ms/sample - loss: 1.6158 - acc: 0.4204 - val_loss: 1.6352 - val_acc: 0.4106
Epoch 7/10
45000/45000 [==============================] - 1570s 35ms/sample - loss: 1.4964 - acc: 0.4667 - val_loss: 1.4699 - val_acc: 0.4772
Epoch 8/10
45000/45000 [==============================] - 1148s 26ms/sample - loss: 1.3796 - acc: 0.5071 - val_loss: 1.3872 - val_acc: 0.5066
Epoch 9/10
45000/45000 [==============================] - 1189s 26ms/sample - loss: 1.2626 - acc: 0.5513 - val_loss: 1.3557 - val_acc: 0.5160
Epoch 10/10
45000/45000 [==============================] - 1134s 25ms/sample - loss: 1.1348 - acc: 0.6020 - val_loss: 1.3638 - val_acc: 0.5306
How could we improve this?
Perhaps adding regularization (dropout) and replace Flatten with GlobalAverage2D?
Perhaps reduce the number of filters and add another ResNet group?
Perhaps andcraft a different configuration for the second group?
Perhaps concatenate the output from the stem convolution to the output of the last group (but you will have to make the feature maps the same size)?
Think of your own idea?
If this is a classroom, we will split into 4 teams and each team will use a different approach.
In [ ]:
# Make mini-ResNetV1 for CIFAR-10
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv2D, Flatten, Dense
# Stem
inputs = Input((32, 32, 3))
x = Conv2D(32, (3, 3), strides=1, padding='same', activation='relu')(inputs)
# Learner
# Residual group: 2 blocks, 128 filters
# Residual block with projection, 256 filters
# Residual block with identity, 256 filters
x = ResNetV2.group(x, 2, 128)
x = ResNetV2.projection_block(x, 256)
x = ResNetV2.identity_block(x, 256)
# Classifier
x = Flatten()(x)
outputs = Dense(10, activation='softmax')(x)
model = Model(inputs, outputs)
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['acc'])
model.summary()
In [ ]:
from tensorflow.keras.datasets import cifar10
import numpy as np
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = (x_train / 255.0).astype(np.float32)
x_test = (x_test / 255.0).astype(np.float32)
model.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.1, verbose=1)
In [ ]: